library(knitr) # for kable tables
library(performance) # for model checking
library(DHARMa) # for model checking
library(effects) # for nice summary of model results (allEffects)
library(broom.mixed) # for tidy output
library(ggplotify) # to use patchwork on lattice plots (created by allEffects)
library(MuMIn) # to calculate R2
library(pROC) # for ROC curves and AUC
library(patchwork) # for combining ggplot plots
library(tidyverse)
# Update this when have final results from workshop:
PGS_df <- read.table(file = "/path/PGS_Workshop/01_Create_PGS/Plink_PGS_scores/ILAE3_Caucasian_all_epilepsy_EUR.QC_PGS_scores.profile", header = T)
# Read in covariates data
cov_df <- read.table(file = "/path/PGS_Workshop/Genotype_data/EUR.cov", header = T)
# Read in ancestry PCs data
PCs_df <- read.table(file = "/path/PGS_Workshop/Genotype_data/EUR.eigenvec", header = F, col.names = c("FID", "IID", "PC1", "PC2", "PC3", "PC4", "PC5", "PC6"))
# Relatedness data
related_df <- read.table(file = "/path/PGS_Workshop/02_Evaluate_PGS/EUR_relatedness.genome", header = T)
# Merge these three dataframes (PGS, covariates and PCs) and add simulated data for our phenotype = epilepsy (case/control)
set.seed(123) # set seed for reproducibility
Df <- PGS_df %>%
left_join(cov_df) %>%
left_join(PCs_df) %>%
mutate(
lp = -1.5 + 0.8 * scale(SCORESUM) +
0.05 * Sex + 0.01 * PC1 + 0.005 * PC2 +
0.005 * PC3 + 0.00001 * PC4 + 0.0003 * PC5 + 0.00008 * PC6,
prob = plogis(lp),
Epilepsy = rbinom(n(), size = 1, prob = prob)) %>%
select(-c(lp, prob))
# Remove 1 individual from each pair that have pi_hat > 0.1875 with controls preferentially removed.
# This removes individuals that are first- and second-degress relatives (0.1875 is halfway between 2nd and 3rd degree relatives)
# Note in our data that related_df is empty - no individuals have pi_hat > 0.09 (this dataset was QCed and related individuals were already removed)
# So this code is commented out, but you can use it if your data includes related individuals
# # Df with case/ctrl status for each IID
# Epilepsy_status <- Df %>%
# select(IID, Epilepsy)
#
# # Join related df with CP Epilepsy_status so have case/control status for both IID1 and IID2
#
# related_epilepsy_status <- related_df %>%
# left_join(Epilepsy_status, by = c("IID1" = "IID")) %>%
# rename(Epilepsy_IID1 = Epilepsy) %>%
# left_join(Epilepsy_status, by = c("IID2" = "IID")) %>%
# rename(Epilepsy_IID2 = Epilepsy)
#
# # Filter pairs with PI_HAT > 0.1875, preferentially removing controls and keeping cases
# related_remove <- related_epilepsy_status %>%
# filter(PI_HAT > 0.1875) %>%
# rowwise() %>%
# mutate(
# remove = case_when(
# Epilepsy_IID1 == 0 & Epilepsy_IID2 == 1 ~ IID1, # Prefer removing control (IID1)
# Epilepsy_IID1 == 1 & Epilepsy_IID2 == 0 ~ IID2, # Prefer removing control (IID2)
# Epilepsy_IID1 == 0 & Epilepsy_IID2 == 0 ~ sample(c(IID1, IID2), 1), # Randomly remove one if both are controls
# Epilepsy_IID1 == 1 & Epilepsy_IID2 == 1 ~ sample(c(IID1, IID2), 1) # Randomly remove one if both are cases
# )
# ) %>%
# select(remove)
#
# # Remove these individuals from df
# Df <- Df %>%
# anti_join(related_remove, by = join_by("IID" == "remove"))
save(Df, file = "Data/Df.Rdata")
# Data exploration before fitting regression models
# Check PGS before and after standardisation
Df %>%
summarise(
mean_PRS = mean(SCORESUM, na.rm = TRUE),
sd_PRS = sd(SCORESUM, na.rm = TRUE),
mean_PRS_standardised = mean(scale(SCORESUM), na.rm = TRUE),
sd_PRS_standardised = sd(scale(SCORESUM), na.rm = TRUE)) %>%
kable()
| mean_PRS | sd_PRS | mean_PRS_standardised | sd_PRS_standardised |
|---|---|---|---|
| -0.0193872 | 0.0116532 | 0 | 1 |
# Plot distribution of standardised PGS split by epilepsy case/control
means <- Df %>%
mutate(SCORESUM_scaled = scale(SCORESUM)) %>%
group_by(Epilepsy) %>%
summarise(mean_value = mean(SCORESUM_scaled, na.rm = TRUE))
labels <- c("1" = "Case", "0" = "Control")
ggplot(Df, aes(x = scale(SCORESUM), colour = as.factor(Epilepsy), fill = as.factor(Epilepsy))) +
geom_density(alpha = 0.6, position = "identity") +
geom_vline(data = means, aes(xintercept = mean_value, colour = as.factor(Epilepsy)),
linetype = "dashed", linewidth = 1, alpha = 1,
show.legend = F) +
scale_colour_viridis_d(name = "Epilepsy", labels = labels) +
scale_fill_viridis_d(name = "Epilepsy", labels = labels) +
scale_x_continuous(paste("Polygenic Score for epilepsy (standardised)")) +
scale_y_continuous("") +
theme_classic() +
theme(text = element_text(family = "Calibri"),
axis.text = element_text(size = 10),
axis.title.x = element_text(size = 12, colour = "black", margin = margin(10,0,0,0)),
axis.title.y = element_text(size = 12, colour = "black", margin = margin(0,10,0,0)),
legend.title = element_blank(),
legend.text = element_text(size = 10, colour = "black"),
legend.position = "right")
logistic_regression_covar <- glm(Epilepsy ~ Sex + PC1 + PC2 + PC3 + PC4 + PC5 + PC6,
family = binomial(link = 'logit'),
data = Df)
logistic_regression_PGS <- glm(Epilepsy ~ scale(SCORESUM) + Sex + PC1 + PC2 + PC3 + PC4 + PC5 + PC6,
family = binomial(link = 'logit'),
data = Df)
save(logistic_regression_covar, logistic_regression_PGS, file = "Results/logistic_regression_models.Rdata")
performance::check_model(logistic_regression_covar)
simulateResiduals(logistic_regression_covar, plot=TRUE)
## Object of Class DHARMa with simulated residuals based on 250 simulations with refit = FALSE . See ?DHARMa::simulateResiduals for help.
##
## Scaled residual values: 0.5342695 0.3987193 0.7909382 0.9275532 0.9366849 0.5574282 0.5132182 0.3413922 0.7306458 0.4876829 0.9584471 0.6598171 0.3993855 0.294575 0.3646907 0.8758508 0.7393357 0.6822836 0.4953915 0.9085732 ...
performance::check_model(logistic_regression_PGS)
simulateResiduals(logistic_regression_PGS, plot=TRUE)
## Object of Class DHARMa with simulated residuals based on 250 simulations with refit = FALSE . See ?DHARMa::simulateResiduals for help.
##
## Scaled residual values: 0.4588432 0.4242531 0.9105435 0.879569 0.9599435 0.6487582 0.6422395 0.4602313 0.5411003 0.6160205 0.9722981 0.712462 0.3517316 0.3558069 0.4031858 0.8432616 0.8202006 0.8273669 0.4903365 0.8996097 ...
tidy(logistic_regression_PGS, conf.int = TRUE, conf.level = 0.95, exponentiate = TRUE) %>%
filter(str_detect(term, "SCORESUM")) %>%
kable(caption = "Results from logistic regression (back-transformation conducted so estimate = odds ratio")
| term | estimate | std.error | statistic | p.value | conf.low | conf.high |
|---|---|---|---|---|---|---|
| scale(SCORESUM) | 1.893025 | 0.1284467 | 4.968408 | 7e-07 | 1.48026 | 2.451792 |
or_table <- tidy(logistic_regression_PGS, exponentiate = TRUE, conf.int = TRUE, conf.level = 0.95,) %>%
filter(str_detect(term, "SCORESUM")) %>%
mutate(
term = str_replace(term, "scale\\(SCORESUM\\)", "PGS"),
sig_label = ifelse(p.value < 0.05, "*", "")
)
ggplot(or_table, aes(x = term, y = estimate)) +
geom_hline(yintercept = 1, linetype = "dashed", color = "gray") +
geom_point() +
geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width = 0.2) +
scale_y_continuous("Odds Ratio") +
scale_x_discrete("Predictor") +
# geom_text(aes(label = sig_label, y = conf.high + 0.05), size = 6) +
theme_classic() +
theme(
text = element_text(family = "Calibri"),
axis.text = element_text(size = 10),
axis.title.x = element_text(size = 12, colour = "black", margin = margin(10, 0, 0, 0)),
axis.title.y = element_text(size = 12, colour = "black", margin = margin(0, 10, 0, 0)),
legend.title = element_blank(),
legend.text = element_text(size = 10, colour = "black"),
legend.position = "right"
)
r2_PGS <- r2_nagelkerke(logistic_regression_PGS) - r2_nagelkerke(logistic_regression_covar)
r2_PGS %>%
kable(caption = "Nagelkerke's R2 attributable to the PGS")
| x | |
|---|---|
| Nagelkerke’s R2 | 0.0836051 |
# Variance explained by the PGS on the liability scale
# Convert observed R2 to the liability scale using poopulation prevalence of your disease/condition
# Use R code from Lee SH, et al., A Better Coefficient of Determination for Genetic Profile Analysis. Genetic Epidemiology, 2012. 36(3):214-224.
# Made into a function
# lm = linear model
# K = population prevalence
# P = sample prevalence
mapToLiabilityScale = function(lm, K, P){
thd = qnorm(1 - K) # the threshold on the normal distribution which truncates the proportion of disease prevalence K
zv = dnorm(thd) # z (normal density)
mv = zv/K # mean liability for case
mv2 = -mv*K/(1-K) # mean liability for control
y = lm$model[[1]]
ncase = sum(y == 1)
nt = length(y)
ncont = nt - ncase
R2O = var(lm$fitted.values)/(ncase/nt*ncont/nt) # R2 on the observed scale
theta = mv*(P-K)/(1-K)*(mv*(P-K)/(1-K)-thd) # theta in equation 15 of the publication
cv = K*(1-K)/zv^2*K*(1-K)/(P*(1-P)) # C in equation 15 of the publication
R2 = R2O*cv/(1+R2O*theta*cv)
return(R2)
}
# Calculate difference in R2 on liability scale of full model - covariates only model to identify variance attributable to PGS
# Bootstrap to get 90% CI
# Doing a one-sided test of significance (> 0) therefore use a one-sided 95% CI (equivalent to just the lower bound of a 90% CI)
set.seed(123) # for reproducibility
n_boot <- 1000
r2_diffs <- numeric(n_boot)
for (i in 1:n_boot) {
# Resample data with replacement
boot_data <- Df[sample(1:nrow(Df), replace = TRUE), ]
# Fit models with resampled data
lm_covar <- lm(Epilepsy ~ Sex + PC1 + PC2 + PC3 + PC4 + PC5 + PC6,
data = boot_data)
lm_full <- lm(Epilepsy ~ SCORESUM + Sex + PC1 + PC2 + PC3 + PC4 + PC5 + PC6,
data = boot_data)
# Calculate sample prevalence in the bootstrap sample
ncase <- sum(boot_data$Epilepsy == 1)
P_boot <- ncase / nrow(boot_data)
# Liability R² (using lifetime population prevalence K as 0.012 (1.2%))
r2_covar <- mapToLiabilityScale(lm_covar, K = 0.012, P = P_boot)
r2_full <- mapToLiabilityScale(lm_full, K = 0.012, P = P_boot)
r2_diffs[i] <- r2_full - r2_covar
}
save(r2_diffs, file = "Results/r2_liability_attributable_PGS_bootstrapping.Rdata")
# Doing a one-sided test of significance (> 0) therefore use a one-sided 95% CI (equivalent to just the lower bound of a 90% CI)
n_boot <- 1000
tibble(
r2l_mean = mean(r2_diffs),
r2l_SE = sd(r2_diffs) / sqrt(n_boot),
r2l_Z = r2l_mean / r2l_SE,
r2l_p = pnorm(r2l_Z, mean = 0, sd = 1, lower.tail = F),
r2l_90perc_CI_lower = quantile(r2_diffs, 0.05),
r2l_90perc_CI_upper = quantile(r2_diffs, 0.95)) %>%
kable(caption = "Variance explained in epilepsy risk on the liability scale attributable to epilepsy PGS and one-sided test of significance if R2 (liability) attributable to PGS is > 0")
| r2l_mean | r2l_SE | r2l_Z | r2l_p | r2l_90perc_CI_lower | r2l_90perc_CI_upper |
|---|---|---|---|---|---|
| 0.0511901 | 0.0006007 | 85.21772 | 0 | 0.0247302 | 0.0859619 |
# Create deciles of PGS and set first decile as the reference/base group
Df_lowest <- Df %>%
mutate(
PGS_decile = ntile(scale(SCORESUM), 10),
PGS_decile = factor(PGS_decile, levels = 1:10)
)
# Create deciles of PGS and set middle deciles as the reference/base group
Df_mid <- Df %>%
mutate(
PGS_decile = ntile(scale(SCORESUM), 10),
PGS_decile = case_when(
PGS_decile %in% c("5", "6") ~ "5_6",
TRUE ~ as.character(PGS_decile)
),
PGS_decile = factor(PGS_decile, levels = c('5_6','1','2','3','4','7','8','9','10'))
)
# Run logistic regression with the PGS decile as the predictor
decile_model_lowest <- glm(Epilepsy ~ PGS_decile + Sex + PC1 + PC2 + PC3 + PC4 + PC5 + PC6,
family = binomial(link = 'logit'),
data = Df_lowest)
decile_model_mid <- glm(Epilepsy ~ PGS_decile + Sex + PC1 + PC2 + PC3 + PC4 + PC5 + PC6,
family = binomial(link = 'logit'),
data = Df_mid)
save(decile_model_lowest, decile_model_mid, file = "Results/decile_model.Rdata")
performance::check_model(decile_model_lowest)
simulateResiduals(decile_model_lowest, plot=TRUE)
## Object of Class DHARMa with simulated residuals based on 250 simulations with refit = FALSE . See ?DHARMa::simulateResiduals for help.
##
## Scaled residual values: 0.4808425 0.4006835 0.9144018 0.9049724 0.9392691 0.6235637 0.6623095 0.4688741 0.5045213 0.638836 0.9609655 0.7475587 0.3199623 0.3425676 0.4193943 0.8649877 0.8471555 0.8116823 0.497919 0.9103658 ...
performance::check_model(decile_model_mid)
simulateResiduals(decile_model_mid, plot=TRUE)
## Object of Class DHARMa with simulated residuals based on 250 simulations with refit = FALSE . See ?DHARMa::simulateResiduals for help.
##
## Scaled residual values: 0.4808425 0.4006835 0.9144018 0.9040315 0.9392691 0.6235637 0.6623095 0.4688741 0.5045213 0.638836 0.9609655 0.7159717 0.3199623 0.3425676 0.4193943 0.8649877 0.8471555 0.8116823 0.5231941 0.9085732 ...
tidy(decile_model_lowest, exponentiate = TRUE, conf.int = TRUE) %>%
filter(str_detect(term, "PGS_decile")) %>%
mutate(p.value_adjusted = p.adjust(p.value, method = "BH", n = 9)) %>%
kable(caption = "Results from logistic regression with decile of PGS as predictor and lowest decile as the reference (Back-transformation conducted so estimate = odds ratio. P-value adjusted for 9 comparisons with Benjamini-Hochberg method)")
| term | estimate | std.error | statistic | p.value | conf.low | conf.high | p.value_adjusted |
|---|---|---|---|---|---|---|---|
| PGS_decile2 | 2.128884 | 0.7436129 | 1.016118 | 0.3095734 | 0.5212012 | 10.67241 | 0.3095734 |
| PGS_decile3 | 4.183359 | 0.6993374 | 2.046387 | 0.0407183 | 1.1683779 | 19.82282 | 0.0732930 |
| PGS_decile4 | 2.412578 | 0.7444925 | 1.182948 | 0.2368297 | 0.5907471 | 12.11951 | 0.2664334 |
| PGS_decile5 | 2.556933 | 0.7286031 | 1.288504 | 0.1975704 | 0.6552924 | 12.57789 | 0.2540191 |
| PGS_decile6 | 3.934688 | 0.6991824 | 1.959191 | 0.0500905 | 1.0988201 | 18.63585 | 0.0751357 |
| PGS_decile7 | 4.434315 | 0.6948789 | 2.143357 | 0.0320845 | 1.2547005 | 20.89110 | 0.0721901 |
| PGS_decile8 | 7.801298 | 0.6804313 | 3.019100 | 0.0025353 | 2.3036791 | 36.09150 | 0.0114087 |
| PGS_decile9 | 5.646489 | 0.6851648 | 2.526449 | 0.0115222 | 1.6438500 | 26.27289 | 0.0345666 |
| PGS_decile10 | 12.551283 | 0.6748478 | 3.748731 | 0.0001777 | 3.7770468 | 57.71648 | 0.0015996 |
or_table <- tidy(decile_model_lowest, exponentiate = TRUE, conf.int = TRUE, conf.level = 0.95,) %>%
filter(str_detect(term, "PGS_decile")) %>%
add_row(
term = "PGS_decile1",
estimate = 1,
conf.low = 1,
conf.high = 1,
p.value = NA) %>%
mutate(
decile = str_remove(term, "PGS_decile"),
decile = factor(decile, levels = as.character(1:10)),
p.value_adjusted = p.adjust(p.value, method = "BH", n = 9),
sig_label = ifelse(p.value_adjusted < 0.05, "*", "")
)
a <- ggplot(or_table, aes(x = decile, y = estimate)) +
geom_hline(yintercept = 1, linetype = "dashed", color = "gray") +
geom_point() +
geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width = 0.2) +
scale_y_continuous("Odds Ratio") +
scale_x_discrete("PGS Decile") +
geom_text(aes(label = sig_label, y = conf.high + 1), size = 6) +
theme_classic() +
theme(
text = element_text(family = "Calibri"),
axis.text = element_text(size = 10),
axis.title.x = element_text(size = 12, colour = "black", margin = margin(10, 0, 0, 0)),
axis.title.y = element_text(size = 12, colour = "black", margin = margin(0, 10, 0, 0)),
legend.title = element_blank(),
legend.text = element_text(size = 10, colour = "black"),
legend.position = "right"
)
tidy(decile_model_mid, exponentiate = TRUE, conf.int = TRUE) %>%
filter(str_detect(term, "PGS_decile")) %>%
mutate(p.value_adjusted = p.adjust(p.value, method = "BH", n = 8)) %>%
kable(caption = "Results from logistic regression with decile of PGS as predictor and middle deciles (5th and 6th) as the reference (Back-transformation conducted so estimate = odds ratio. P-value adjusted for 9 comparisons with Benjamini-Hochberg method)")
| term | estimate | std.error | statistic | p.value | conf.low | conf.high | p.value_adjusted |
|---|---|---|---|---|---|---|---|
| PGS_decile1 | 0.3103084 | 0.6577260 | -1.7791427 | 0.0752164 | 0.0693238 | 0.9966392 | 0.2005770 |
| PGS_decile2 | 0.6602868 | 0.5188219 | -0.8000453 | 0.4236845 | 0.2216661 | 1.7482806 | 0.5743983 |
| PGS_decile3 | 1.2986998 | 0.4502903 | 0.5804336 | 0.5616222 | 0.5231437 | 3.1045951 | 0.5743983 |
| PGS_decile4 | 0.7483146 | 0.5162735 | -0.5615858 | 0.5743983 | 0.2530874 | 1.9749678 | 0.5743983 |
| PGS_decile7 | 1.3746537 | 0.4426014 | 0.7189354 | 0.4721807 | 0.5650837 | 3.2476429 | 0.5743983 |
| PGS_decile8 | 2.4158366 | 0.4204529 | 2.0978466 | 0.0359187 | 1.0550577 | 5.5355786 | 0.1436748 |
| PGS_decile9 | 1.7473400 | 0.4273181 | 1.3060402 | 0.1915389 | 0.7486755 | 4.0388934 | 0.3830778 |
| PGS_decile10 | 3.8839014 | 0.4097572 | 3.3113274 | 0.0009285 | 1.7515447 | 8.7914692 | 0.0074284 |
or_table <- tidy(decile_model_mid, exponentiate = TRUE, conf.int = TRUE, conf.level = 0.95,) %>%
filter(str_detect(term, "PGS_decile")) %>%
add_row(
term = "PGS_decile5",
estimate = 1,
conf.low = 1,
conf.high = 1,
p.value = NA) %>%
mutate(
decile = str_remove(term, "PGS_decile"),
decile = factor(decile, levels = as.character(1:10)),
p.value_adjusted = p.adjust(p.value, method = "BH", n = 9),
sig_label = ifelse(p.value_adjusted < 0.05, "*", "")
)
a <- ggplot(or_table, aes(x = decile, y = estimate)) +
geom_hline(yintercept = 1, linetype = "dashed", color = "gray") +
geom_point() +
geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width = 0.2) +
scale_y_continuous("Odds Ratio") +
scale_x_discrete("PGS Decile",
labels = c('1', '2', '3', '4', '5 & 6', '7', '8', '9', '10')) +
geom_text(aes(label = sig_label, y = conf.high + 1), size = 6) +
theme_classic() +
theme(
text = element_text(family = "Calibri"),
axis.text = element_text(size = 10),
axis.title.x = element_text(size = 12, colour = "black", margin = margin(10, 0, 0, 0)),
axis.title.y = element_text(size = 12, colour = "black", margin = margin(0, 10, 0, 0)),
legend.title = element_blank(),
legend.text = element_text(size = 10, colour = "black"),
legend.position = "right"
)
model_PGS <- glm(Epilepsy ~ scale(SCORESUM),
family = binomial(link = 'logit'),
data = Df)
roc_model_PGS <- roc(Df$Epilepsy, model_PGS$fitted.values)
save(roc_model_PGS, file = "Results/roc_model_PGS.Rdata")
# Doing a one-sided test of significance (> 0.5) therefore use a one-sided 95%CI (equivalent to just the lower bound of a 90% CI)
tibble(
AUC = as.numeric(auc(roc_model_PGS)),
AUC_SE = sqrt(var(roc_model_PGS)),
AUC_Z = (AUC - 0.5) / AUC_SE,
AUC_p = pnorm(AUC_Z, mean = 0, sd = 1, lower.tail = F),
AUC_90perc_CI_lower = as.numeric(ci(roc_model_PGS, conf.level = 0.90))[1]) %>%
kable(caption = "AUC results and one-sided test of significance if AUC is > 0.5")
| AUC | AUC_SE | AUC_Z | AUC_p | AUC_90perc_CI_lower |
|---|---|---|---|---|
| 0.6564001 | 0.0303257 | 5.157345 | 1e-07 | 0.6065188 |
plot(roc_model_PGS,
print.auc = TRUE,
main = paste("ROC curve for PGS of epilepsy"))
# Use logistic regression models to run ROC analysis for covariates only and covariats + PGS
roc_model_covar <- roc(Df$Epilepsy, logistic_regression_covar$fitted.values)
roc_model_full <- roc(Df$Epilepsy, logistic_regression_PGS$fitted.values)
save(roc_model_covar, roc_model_full, file = "Results/roc_models_covar_full.Rdata")
# DeLong's test for two correlated ROC curves
sig_test <- roc.test(roc_model_full, roc_model_covar)
tibble(
AUC_covariates_only = as.numeric(auc(roc_model_covar)),
AUC_covariates_plus_PGS = as.numeric(auc(roc_model_full)),
AUC_change = AUC_covariates_only - AUC_covariates_plus_PGS,
AUC_change_Z = sig_test[["statistic"]][["Z"]],
AUC_change_p = sig_test[["p.value"]],
AUC_change_CI_lower = sig_test$conf.int[1],
AUC_change_CI_upper = sig_test$conf.int[2]) %>%
kable(caption = "AUC results and DeLong's test for two correlated ROC curves (does adding PGS to analysis significanlty increase AUC?)")
| AUC_covariates_only | AUC_covariates_plus_PGS | AUC_change | AUC_change_Z | AUC_change_p | AUC_change_CI_lower | AUC_change_CI_upper |
|---|---|---|---|---|---|---|
| 0.5949923 | 0.6779254 | -0.0829331 | 2.706189 | 0.006806 | 0.0228686 | 0.1429975 |
roc_covar_df <- ggroc(roc_model_covar) %>%
.[["data"]] %>%
mutate(model = "covar")
roc_full_df <- ggroc(roc_model_full) %>%
.[["data"]] %>%
mutate(model = "full")
roc_combined_df <- roc_covar_df %>%
bind_rows(roc_full_df)
auc_covar <- round(auc(roc_model_covar), 3)
auc_full <- round(auc(roc_model_full), 3)
ggplot(roc_combined_df, aes(x = specificity, y = sensitivity, colour = model, linetype = model)) +
geom_line() +
scale_x_reverse("Specificity") +
scale_y_continuous("Sensitivity") +
scale_linetype_manual(values = c("covar" = "solid", "full" = "dashed")) +
scale_colour_manual(values = c("covar" = "#160F3BFF", "full" = "#F4685CFF")) +
annotate("text",
x = max(roc_combined_df$specificity, na.rm = TRUE) - 0.1,
y = min(roc_combined_df$sensitivity, na.rm = TRUE) + 0.05, hjust = 0,
label = paste("AUC (Covariates only) =", auc_covar), color = "#160F3BFF") +
annotate("text",
x = max(roc_combined_df$specificity, na.rm = TRUE) - 0.1,
y = min(roc_combined_df$sensitivity, na.rm = TRUE) + 0.05 + 0.05,
hjust = 0, label = paste("AUC (Covariates + PGS) =", auc_full), color = "#F4685CFF") +
theme_classic() +
theme(text = element_text(family = "Calibri"),
axis.text = element_text(size = 10),
axis.title.x = element_text(size = 12, colour = "black", margin = margin(10,0,0,0)),
axis.title.y = element_text(size = 12, colour = "black", margin = margin(0,10,0,0)),
legend.title = element_blank(),
legend.text = element_text(size = 10, colour = "black"),
legend.position = "none")
## ----